# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : 标记出一句话中的区域词.py
# @Author: dongguangwen
# @Date  : 2024-07-06 11:19

import cpca
import jieba.posseg as psg

content_list = ["秦皇岛氨水制备器", "天津奥利达氨水制备器", "张家口氨水制备器", "哈密速成工业氨水制备器", "阿勒泰速成工业氨水制备器"]
content_list = ["杭州金东氨水制备器", "欧力诺氨水制备器图片"]
content_list = ["江西江氨科技有限公司氨水制备器", "欧力诺氨水制备器图片"]

result_list = []
for content in content_list:
    area_words = cpca.transform([content]).values.tolist()[0][:3]
    area_str = "".join([area for area in area_words if area])
    print(area_words)
    words_flag = psg.lcut(content)
    print(words_flag)
    new_content = ""
    for word, flag in words_flag:
        if flag == 'ns' and len(word) >= 2:
            content = content.replace(word, f"[{word}]")
        else:
            if word in area_str and len(word) >= 2:
                content = content.replace(word, f"[{word}]")

    result_list.append(content)

print(result_list)

"""
['河北省', '秦皇岛市', None]
Building prefix dict from the default dictionary ...
Loading model from cache C:Users\86187\AppData\Local\Temp\jieba.cache
Loading model cost 0.657 seconds.
Prefix dict has been built successfully.
[pair('秦皇岛', 'nr'), pair('氨水', 'n'), pair('制备', 'v'), pair('器', 'n')]
['天津市', None, None]
[pair('天津', 'ns'), pair('奥利', 'nz'), pair('达', 'v'), pair('氨水', 'n'), pair('制备', 'v'), pair('器', 'n')]
['河北省', '张家口市', None]
[pair('张家口', 'nr'), pair('氨水', 'n'), pair('制备', 'v'), pair('器', 'n')]
['[秦皇岛]氨水制备器', '[天津]奥利达氨水制备器', '[张家口]氨水制备器']
"""
